## Import data 
covid = 
  read_csv("./data/covid_data_monthly.csv") %>% 
  mutate(month = month.name[as.numeric(month)],
         month = as.factor(month),
         month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
         year = as.character(year),
         Confirmed_of_Month = as.numeric(Confirmed_of_Month))
## Rows: 20 Columns: 6
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): month
## dbl (5): year, Confirmed_of_Month, Deaths_of_Month, Recovered_of_Month, Acti...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
## take a look at covid plot 
covid %>% 
  ggplot(aes(x = month, y = Confirmed_of_Month, color = year)) + 
  geom_line(aes(group = year)) +
  geom_point(size = 1.5)

export_volume_df = 
  read_csv("./data/cleaned data/export_volume_combined.csv") %>% 
  mutate(month = as.factor(month),
         month = recode(month, "Feburary" = "February"),
         month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
         year = as.character(year))
## Rows: 855 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): product_type, month
## dbl (2): year, export_volume
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
export_volume_df %>% 
  group_by(month,year) %>% 
  summarize(sum_of_export = sum(export_volume)) %>% 
  ggplot(aes(x = month, y = sum_of_export, color = year)) + 
  geom_line(aes(group = year)) +
  geom_point(size = 1.5) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.

import_volume_df = 
  read_csv("./data/cleaned data/import_volume_combined.csv") %>% 
  mutate(month = as.factor(month),
         month = recode(month, "Feburary" = "February"),
         month = ordered(month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")),
         year = as.character(year))
## Rows: 585 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (2): product_type, month
## dbl (2): year, import_volume
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
import_volume_df %>% 
  group_by(month,year) %>% 
  summarize(sum_of_import = sum(import_volume)) %>% 
  ggplot(aes(x = month, y = sum_of_import, color = year)) + 
  geom_line(aes(group = year)) +
  geom_point(size = 1.5) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## `summarise()` has grouped output by 'month'. You can override using the `.groups` argument.

combined_df = list(covid, export_volume_df, import_volume_df) %>% 
  reduce(left_join, by = c("year", "month"), all = TRUE) %>%
  janitor::clean_names() %>% 
  rename("export_product_type" = "product_type_x",
         "import_product_type" = "product_type_y") 

export vs. covid case

export_fit = lm(export_volume ~ confirmed_of_month, data = combined_df)
summary(export_fit)
## 
## Call:
## lm(formula = export_volume ~ confirmed_of_month, data = combined_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1885.0 -1684.6 -1548.5  -417.6 17170.7 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.930e+03  8.917e+01  21.641   <2e-16 ***
## confirmed_of_month -1.344e-07  1.230e-07  -1.093    0.274    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3489 on 4444 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0002687,  Adjusted R-squared:  4.376e-05 
## F-statistic: 1.195 on 1 and 4444 DF,  p-value: 0.2745
export_df = 
  combined_df %>% 
  na.omit() %>% 
  nest(data = confirmed_of_month:export_volume) %>% 
  mutate(
    lm_fits = map(.x = data, ~ lm(export_volume ~ confirmed_of_month, data = .x), na.action = na.omit()),
    lm_results = map(lm_fits, broom::tidy)) %>% 
  select(month, year, lm_results) %>% 
  unnest(lm_results) 
  
export_df %>% 
  na.omit() %>% 
  ggplot(aes(x = estimate)) +
  geom_density()

export_df %>%  
  ggplot(aes(x = month, y = estimate)) + 
  geom_point() + 
  facet_grid(~term) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

export_plot = combined_df %>% 
  group_by(month, year, confirmed_of_month) %>% 
  summarize(export_sum = sum(export_volume)) %>% 
ggplot(aes(x = confirmed_of_month, y = export_sum)) +
  geom_point(alpha = 0.5) +
  scale_y_continuous() +
  geom_smooth(se = F, color = "red", method = "lm", size = 1, linetype = 2) +
  labs(title = "Covid Cases vs. Export Volume", 
       x = "Covid Cases", 
       y = "Export Volume")
## `summarise()` has grouped output by 'month', 'year'. You can override using the `.groups` argument.
combined_df %>% 
  mutate(
    text_label = str_c("Confirmed Cases: $", confirmed_of_month, "\nExport Volume: ", export_volume)) %>% 
  plot_ly(
    x = ~ confirmed_of_month, y = ~ export_volume, type = "scatter", mode = "markers", color = ~month, text = ~text_label, alpha = 0.5)

import vs. covid case

import_fit = lm(import_volume ~ confirmed_of_month, data = combined_df)
summary(import_fit)
## 
## Call:
## lm(formula = import_volume ~ confirmed_of_month, data = combined_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##  -427.7  -313.1  -246.3    23.7 10390.0 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4.413e+02  2.247e+01  19.645   <2e-16 ***
## confirmed_of_month -2.577e-08  3.098e-08  -0.832    0.406    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 878.9 on 4444 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0001557,  Adjusted R-squared:  -6.929e-05 
## F-statistic: 0.692 on 1 and 4444 DF,  p-value: 0.4055
import_df = 
  combined_df %>% 
  na.omit() %>% 
  nest(data = confirmed_of_month:import_volume) %>% 
  mutate(
    lm_fits = map(.x = data, ~ lm(import_volume ~ confirmed_of_month, data = .x), na.action = na.omit()),
    lm_results = map(lm_fits, broom::tidy)) %>% 
  select(month, year, lm_results) %>% 
  unnest(lm_results) 
  
import_df %>% 
  ggplot(aes(x = estimate)) +
  geom_density()

import_df %>% 
  ggplot(aes(x = month, y = estimate)) + 
  geom_point() + 
  facet_grid(~term) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

import_plot = combined_df %>% 
  group_by(month, year, confirmed_of_month) %>% 
  summarize(import_sum = sum(import_volume)) %>% 
  ggplot(aes(x = confirmed_of_month, y = import_sum)) +
  geom_point(alpha = 0.5) +
  scale_y_continuous() +
  geom_smooth(se = F, color = "red", method = "lm", size = 1, linetype = 2) +
  labs(title = "Covid Cases vs. Import Volume", 
       x = "Covid Cases", 
       y = "Import Volume")
## `summarise()` has grouped output by 'month', 'year'. You can override using the `.groups` argument.
combined_df %>% 
  group_by(month, year, confirmed_of_month) %>% 
  summarize(import_sum = sum(import_volume)) %>% 
  mutate(
    text_label = str_c("Confirmed Cases: $", confirmed_of_month, "\nImport Volume: ", import_sum)) %>% 
  plot_ly(
    x = ~ confirmed_of_month, y = ~ import_sum, type = "scatter", mode = "markers", color = ~month, text = ~text_label, alpha = 0.5)
## `summarise()` has grouped output by 'month', 'year'. You can override using the `.groups` argument.
ggpubr::ggarrange(import_plot, export_plot)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'